In [ ]:
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
In [ ]:
# @title S1
import torch
BATCH_SIZE = 8 # Increase / decrease according to GPU memeory.
RESIZE_TO = 640 # Resize the image for training and transforms.
NUM_EPOCHS = 50 # Number of epochs to train for.
NUM_WORKERS = 4 # Number of parallel workers for data loading.
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# Training images and labels files directory.
# TRAIN_DIR = "/content/drive/MyDrive/Colab Notebooks/data/train"
TRAIN_DIR = "/content/drive/MyDrive/BIOMA_VISION_TAREAS/TRABAJO/african-wildlife/data/train"
# /content/drive/MyDrive/BIOMA_VISION_TAREAS/TRABAJO/african-wildlife/data/test
# Validation images and labels files directory.
# VALID_DIR = "/content/drive/MyDrive/Colab Notebooks/data/valid"
VALID_DIR = "/content/drive/MyDrive/BIOMA_VISION_TAREAS/TRABAJO/african-wildlife/data/valid"
# Classes: 0 index is reserved for background.
CLASSES = ["__background__", "buffalo", "elephant", "rhino", "zebra"]
NUM_CLASSES = len(CLASSES)
# Whether to visualize images after crearing the data loaders.
VISUALIZE_TRANSFORMED_IMAGES = True
# Location to save model and plots.
OUT_DIR = "outputs"
In [ ]:
# @title S2
!pip install config
Requirement already satisfied: config in /usr/local/lib/python3.11/dist-packages (0.5.1)
In [ ]:
# @title S3
def collate_fn(batch):
"""
To handle the data loading as different images may have different
numbers of objects, and to handle varying-size tensors as well.
"""
return tuple(zip(*batch))
def get_train_transform():
# We keep "pascal_voc" because bounding box format is [x_min, y_min, x_max, y_max].
return A.Compose(
[
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.5),
A.Rotate(limit=45),
A.Blur(blur_limit=3, p=0.2),
A.MotionBlur(blur_limit=3, p=0.1),
A.MedianBlur(blur_limit=3, p=0.1),
A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.3),
A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.3),
A.RandomScale(scale_limit=0.2, p=0.3),
ToTensorV2(p=1.0),
],
bbox_params={"format": "pascal_voc", "label_fields": ["labels"]},
)
def get_valid_transform():
return A.Compose(
[
ToTensorV2(p=1.0),
],
bbox_params={"format": "pascal_voc", "label_fields": ["labels"]},
)
In [ ]:
# @title S4
import torch
import cv2
import numpy as np
import os
import glob
#from config import CLASSES, RESIZE_TO, TRAIN_DIR, BATCH_SIZE
from torch.utils.data import Dataset, DataLoader
#from custom_utils import collate_fn, get_train_transform, get_valid_transform
class CustomDataset(Dataset):
def __init__(self, dir_path, width, height, classes, transforms=None):
"""
:param dir_path: Directory containing 'images/' and 'labels/' subfolders.
:param width: Resized image width.
:param height: Resized image height.
:param classes: List of class names (or an indexing scheme).
:param transforms: Albumentations transformations to apply.
"""
self.transforms = transforms
self.dir_path = dir_path
self.image_dir = os.path.join(self.dir_path, "images")
self.label_dir = os.path.join(self.dir_path, "labels")
self.width = width
self.height = height
self.classes = classes
# Gather all image paths
self.image_file_types = ["*.jpg", "*.jpeg", "*.png", "*.ppm", "*.JPG"]
self.all_image_paths = []
for file_type in self.image_file_types:
self.all_image_paths.extend(glob.glob(os.path.join(self.image_dir, file_type)))
# Sort for consistent ordering
self.all_image_paths = sorted(self.all_image_paths)
self.all_image_names = [os.path.basename(img_p) for img_p in self.all_image_paths]
def __len__(self):
return len(self.all_image_paths)
def __getitem__(self, idx):
# 1) Read image
image_name = self.all_image_names[idx]
image_path = os.path.join(self.image_dir, image_name)
label_filename = os.path.splitext(image_name)[0] + ".txt"
label_path = os.path.join(self.label_dir, label_filename)
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
# 2) Resize image (to the model's expected size)
image_resized = cv2.resize(image, (self.width, self.height))
image_resized /= 255.0 # Scale pixel values to [0, 1]
# 3) Read bounding boxes (normalized) from .txt file
boxes = []
labels = []
if os.path.exists(label_path):
with open(label_path, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if not line:
continue
# Format: class_id x_min y_min x_max y_max (all in [0..1])
parts = line.split()
class_id = int(parts[0]) # e.g. 0, 1, 2, ...
xmin = float(parts[1])
ymin = float(parts[2])
xmax = float(parts[3])
ymax = float(parts[4])
# Example: if you want class IDs to start at 1 for foreground
# and background=0, do:
label_idx = class_id + 1
# Convert normalized coords to absolute (in resized space)
x_min_final = xmin * self.width
y_min_final = ymin * self.height
x_max_final = xmax * self.width
y_max_final = ymax * self.height
# Ensure valid box
if x_max_final <= x_min_final:
x_max_final = x_min_final + 1
if y_max_final <= y_min_final:
y_max_final = y_min_final + 1
# Clip if out of bounds
x_min_final = max(0, min(x_min_final, self.width - 1))
x_max_final = max(0, min(x_max_final, self.width))
y_min_final = max(0, min(y_min_final, self.height - 1))
y_max_final = max(0, min(y_max_final, self.height))
boxes.append([x_min_final, y_min_final, x_max_final, y_max_final])
labels.append(label_idx)
# 4) Convert boxes & labels to Torch tensors
if len(boxes) == 0:
boxes = torch.zeros((0, 4), dtype=torch.float32)
labels = torch.zeros((0,), dtype=torch.int64)
else:
boxes = torch.tensor(boxes, dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.int64)
# 5) Prepare the target dict
area = (
(boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
if len(boxes) > 0
else torch.tensor([], dtype=torch.float32)
)
iscrowd = torch.zeros((len(boxes),), dtype=torch.int64)
image_id = torch.tensor([idx])
target = {"boxes": boxes, "labels": labels, "area": area, "iscrowd": iscrowd, "image_id": image_id}
# 6) Albumentations transforms: pass Python lists, not Tensors
if self.transforms:
bboxes_list = boxes.cpu().numpy().tolist() # shape: list of [xmin, ymin, xmax, ymax]
labels_list = labels.cpu().numpy().tolist() # shape: list of ints
transformed = self.transforms(
image=image_resized,
bboxes=bboxes_list,
labels=labels_list,
)
# Reassign the image
image_resized = transformed["image"]
# Convert bboxes back to Torch Tensors
new_bboxes_list = transformed["bboxes"] # list of [xmin, ymin, xmax, ymax]
new_labels_list = transformed["labels"] # list of int
if len(new_bboxes_list) > 0:
new_bboxes = torch.tensor(new_bboxes_list, dtype=torch.float32)
new_labels = torch.tensor(new_labels_list, dtype=torch.int64)
else:
new_bboxes = torch.zeros((0, 4), dtype=torch.float32)
new_labels = torch.zeros((0,), dtype=torch.int64)
target["boxes"] = new_bboxes
target["labels"] = new_labels
return image_resized, target
In [ ]:
# @title S5
# ---------------------------------------------------------
# Debug/demo if run directly
# ---------------------------------------------------------
if __name__ == "__main__":
# Example usage with no transforms for debugging
dataset = CustomDataset(dir_path=TRAIN_DIR, width=RESIZE_TO, height=RESIZE_TO, classes=CLASSES, transforms=None)
print(f"Number of training images: {len(dataset)}")
if len(dataset) > 0:
from google.colab.patches import cv2_imshow # Import cv2_imshow
def visualize_sample(image, target):
"""
Visualize a single sample using OpenCV. Expects
`image` as a NumPy array of shape (H, W, 3) in [0..1].
"""
# Convert [0,1] float -> [0,255] uint8
img = (image * 255).astype(np.uint8)
# Convert RGB -> BGR
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
boxes = target["boxes"].cpu().numpy().astype(np.int32)
labels = target["labels"].cpu().numpy().astype(np.int32)
for i, box in enumerate(boxes):
x1, y1, x2, y2 = box
class_idx = labels[i]
# If your class_idx starts at 1 for "first class", ensure you handle that:
# e.g. if CLASSES = ["background", "class1", "class2", ...]
if 0 <= class_idx < len(CLASSES):
class_str = CLASSES[class_idx]
else:
class_str = f"Label_{class_idx}"
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
cv2.putText(
img, class_str, (x1, max(y1 - 5, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2
)
cv2_imshow(img) # Use cv2_imshow instead of cv2.imshow
cv2.waitKey(0)
# Visualize a few samples
NUM_SAMPLES_TO_VISUALIZE = 10
for i in range(min(NUM_SAMPLES_TO_VISUALIZE, len(dataset))): # Also adjust loop range
image, target = dataset[i] # No transforms in this example
# `image` is shape (H, W, 3) in [0..1]
print(f"Visualizing sample {i}, boxes: {target['boxes'].shape[0]}")
visualize_sample(image, target)
cv2.destroyAllWindows()
else:
print("Dataset is empty. Cannot visualize samples.")
Number of training images: 1052 Visualizing sample 0, boxes: 1
Visualizing sample 1, boxes: 3
Visualizing sample 2, boxes: 2
Visualizing sample 3, boxes: 1
Visualizing sample 4, boxes: 1
Visualizing sample 5, boxes: 1
Visualizing sample 6, boxes: 1
Visualizing sample 7, boxes: 1
Visualizing sample 8, boxes: 1
Visualizing sample 9, boxes: 4
In [ ]:
# @title S6
def create_train_loader(train_dataset, num_workers=0):
train_loader = DataLoader(
train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=num_workers,
collate_fn=collate_fn,
drop_last=True,
)
return train_loader
def create_valid_loader(valid_dataset, num_workers=0):
valid_loader = DataLoader(
valid_dataset,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=num_workers,
collate_fn=collate_fn,
drop_last=True,
)
return valid_loader
In [ ]:
# @title S7
!pip install torchmetrics
Requirement already satisfied: torchmetrics in /usr/local/lib/python3.11/dist-packages (1.8.1) Requirement already satisfied: numpy>1.20.0 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (2.0.2) Requirement already satisfied: packaging>17.1 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (25.0) Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (2.6.0+cu124) Requirement already satisfied: lightning-utilities>=0.8.0 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (0.15.2) Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from lightning-utilities>=0.8.0->torchmetrics) (75.2.0) Requirement already satisfied: typing_extensions in /usr/local/lib/python3.11/dist-packages (from lightning-utilities>=0.8.0->torchmetrics) (4.14.1) Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.18.0) Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.5) Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.1.6) Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (2025.3.0) Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (12.4.127) Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (12.4.127) Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (12.4.127) Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (9.1.0.70) Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (12.4.5.8) Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (11.2.1.3) Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (10.3.5.147) Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (11.6.1.9) Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (12.3.1.170) Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (0.6.2) Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (2.21.5) Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (12.4.127) Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (12.4.127) Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.2.0) Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (1.13.1) Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.0.0->torchmetrics) (1.3.0) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2.0.0->torchmetrics) (3.0.2)
In [ ]:
# @title S8
from tqdm.auto import tqdm
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
import torch
import matplotlib.pyplot as plt
import time
import os
In [ ]:
# @title S9
plt.style.use("ggplot")
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
In [ ]:
# @title S10
# Function for running training iterations.
def train(train_data_loader, model):
print("Training")
model.train()
# initialize tqdm progress bar
prog_bar = tqdm(train_data_loader, total=len(train_data_loader))
for i, data in enumerate(prog_bar):
optimizer.zero_grad()
images, targets = data
images = list(image.to(DEVICE) for image in images)
targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
loss_value = losses.item()
train_loss_hist.send(loss_value)
losses.backward()
optimizer.step()
# update the loss value beside the progress bar for each iteration
prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
return loss_value
In [ ]:
# @title S11
from tqdm.auto import tqdm
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
import torch
import matplotlib.pyplot as plt
import time
import os
import torchvision
from functools import partial
from torchvision.models.detection import RetinaNet_ResNet50_FPN_V2_Weights
from torchvision.models.detection.retinanet import RetinaNetClassificationHead
import cv2
import numpy as np
import glob
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from google.colab.patches import cv2_imshow # Import cv2_imshow
# Define functions here
def collate_fn(batch):
"""
To handle the data loading as different images may have different
numbers of objects, and to handle varying-size tensors as well.
"""
return tuple(zip(*batch))
def get_train_transform():
# We keep "pascal_voc" because bounding box format is [x_min, y_min, x_max, y_max].
return A.Compose(
[
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.5),
A.Rotate(limit=45),
A.Blur(blur_limit=3, p=0.2),
A.MotionBlur(blur_limit=3, p=0.1),
A.MedianBlur(blur_limit=3, p=0.1),
A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.3),
A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.3),
A.RandomScale(scale_limit=0.2, p=0.3),
ToTensorV2(p=1.0),
],
bbox_params={"format": "pascal_voc", "label_fields": ["labels"]},
)
def get_valid_transform():
return A.Compose(
[
ToTensorV2(p=1.0),
],
bbox_params={"format": "pascal_voc", "label_fields": ["labels"]},
)
class CustomDataset(Dataset):
def __init__(self, dir_path, width, height, classes, transforms=None):
"""
:param dir_path: Directory containing 'images/' and 'labels/' subfolders.
:param width: Resized image width.
:param height: Resized image height.
:param classes: List of class names (or an indexing scheme).
:param transforms: Albumentations transformations to apply.
"""
self.transforms = transforms
self.dir_path = dir_path
self.image_dir = os.path.join(self.dir_path, "images")
self.label_dir = os.path.join(self.dir_path, "labels")
self.width = width
self.height = height
self.classes = classes
# Gather all image paths
self.image_file_types = ["*.jpg", "*.jpeg", "*.png", "*.ppm", "*.JPG"]
self.all_image_paths = []
for file_type in self.image_file_types:
self.all_image_paths.extend(glob.glob(os.path.join(self.image_dir, file_type)))
# Sort for consistent ordering
self.all_image_paths = sorted(self.all_image_paths)
self.all_image_names = [os.path.basename(img_p) for img_p in self.all_image_paths]
def __len__(self):
return len(self.all_image_paths)
def __getitem__(self, idx):
# 1) Read image
image_name = self.all_image_names[idx]
image_path = os.path.join(self.image_dir, image_name)
label_filename = os.path.splitext(image_name)[0] + ".txt"
label_path = os.path.join(self.label_dir, label_filename)
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
# 2) Resize image (to the model's expected size)
image_resized = cv2.resize(image, (self.width, self.height))
image_resized /= 255.0 # Scale pixel values to [0, 1]
# 3) Read bounding boxes (normalized) from .txt file
boxes = []
labels = []
if os.path.exists(label_path):
with open(label_path, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if not line:
continue
# Format: class_id x_min y_min x_max y_max (all in [0..1])
parts = line.split()
class_id = int(parts[0]) # e.g. 0, 1, 2, ...
# Example: if you want class IDs to start at 1 for foreground
# and background=0, do:
label_idx = class_id + 1
xmin = float(parts[1])
ymin = float(parts[2])
xmax = float(parts[3])
ymax = float(parts[4])
# Convert normalized coords to absolute (in resized space)
x_min_final = xmin * self.width
y_min_final = ymin * self.height
x_max_final = xmax * self.width
y_max_final = ymax * self.height
# Ensure valid box
if x_max_final <= x_min_final:
x_max_final = x_min_final + 1
if y_max_final <= y_min_final:
y_max_final = y_min_final + 1
# Clip if out of bounds
x_min_final = max(0, min(x_min_final, self.width - 1))
x_max_final = max(0, min(x_max_final, self.width))
y_min_final = max(0, min(y_min_final, self.height - 1))
y_max_final = max(0, min(y_max_final, self.height))
boxes.append([x_min_final, y_min_final, x_max_final, y_max_final])
labels.append(label_idx)
# 4) Convert boxes & labels to Torch tensors
if len(boxes) == 0:
boxes = torch.zeros((0, 4), dtype=torch.float32)
labels = torch.zeros((0,), dtype=torch.int64)
else:
boxes = torch.tensor(boxes, dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.int64)
# 5) Prepare the target dict
area = (
(boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
if len(boxes) > 0
else torch.tensor([], dtype=torch.float32)
)
iscrowd = torch.zeros((len(boxes),), dtype=torch.int64)
image_id = torch.tensor([idx])
target = {"boxes": boxes, "labels": labels, "area": area, "iscrowd": iscrowd, "image_id": image_id}
# 6) Albumentations transforms: pass Python lists, not Tensors
if self.transforms:
bboxes_list = boxes.cpu().numpy().tolist() # shape: list of [xmin, ymin, xmax, ymax]
labels_list = labels.cpu().numpy().tolist() # shape: list of ints
transformed = self.transforms(
image=image_resized,
bboxes=bboxes_list,
labels=labels_list,
)
# Reassign the image
image_resized = transformed["image"]
# Convert bboxes back to Torch Tensors
new_bboxes_list = transformed["bboxes"] # list of [xmin, ymin, xmax, ymax]
new_labels_list = transformed["labels"] # list of int
if len(new_bboxes_list) > 0:
new_bboxes = torch.tensor(new_bboxes_list, dtype=torch.float32)
new_labels = torch.tensor(new_labels_list, dtype=torch.int64)
else:
new_bboxes = torch.zeros((0, 4), dtype=torch.float32)
new_labels = torch.zeros((0,), dtype=torch.int64)
target["boxes"] = new_bboxes
target["labels"] = new_labels
return image_resized, target
# ---------------------------------------------------------
# Create train/valid datasets and loaders
# ---------------------------------------------------------
def create_train_dataset(DIR, RESIZE_TO, CLASSES, get_train_transform):
train_dataset = CustomDataset(
dir_path=DIR, width=RESIZE_TO, height=RESIZE_TO, classes=CLASSES, transforms=get_train_transform()
)
return train_dataset
def create_valid_dataset(DIR, RESIZE_TO, CLASSES, get_valid_transform):
valid_dataset = CustomDataset(
dir_path=DIR, width=RESIZE_TO, height=RESIZE_TO, classes=CLASSES, transforms=get_valid_transform()
)
return valid_dataset
def create_train_loader(train_dataset, BATCH_SIZE, NUM_WORKERS, collate_fn):
train_loader = DataLoader(
train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=NUM_WORKERS,
collate_fn=collate_fn,
drop_last=True,
)
return train_loader
def create_valid_loader(valid_dataset, BATCH_SIZE, NUM_WORKERS, collate_fn):
valid_loader = DataLoader(
valid_dataset,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=NUM_WORKERS,
collate_fn=collate_fn,
drop_last=True,
)
return valid_loader
class Averager:
"""
A class to keep track of running average of values (e.g. training loss).
"""
def __init__(self):
self.current_total = 0.0
self.iterations = 0.0
def send(self, value):
self.current_total += value
self.iterations += 1
@property
def value(self):
if self.iterations == 0:
return 0
else:
return self.current_total / self.iterations
def reset(self):
self.current_total = 0.0
self.iterations = 0.0
class SaveBestModel:
"""
Saves the model if the current epoch's validation mAP is higher
than all previously observed values.
"""
def __init__(self, best_valid_map=float(0)):
self.best_valid_map = best_valid_map
def __call__(
self,
model,
current_valid_map,
epoch,
OUT_DIR,
):
if current_valid_map > self.best_valid_map:
self.best_valid_map = current_valid_map
print(f"\nBEST VALIDATION mAP: {self.best_valid_map}")
print(f"SAVING BEST MODEL FOR EPOCH: {epoch+1}\n")
torch.save(
{
"epoch": epoch + 1,
"model_state_dict": model.state_dict(),
},
f"{OUT_DIR}/best_model.pth",
)
def save_model(epoch, model, optimizer, OUT_DIR):
"""
Save the trained model (state dict) and optimizer state to disk.
"""
torch.save(
{
"epoch": epoch + 1,
"model_state_dict": model.state_dict(),
"optimizer_state_dict": optimizer.state_dict(),
},
f"{OUT_DIR}/last_model.pth",
)
def save_loss_plot(OUT_DIR, train_loss_list, x_label="iterations", y_label="train loss", save_name="train_loss"):
"""
Saves the training loss curve.
"""
plt.figure(figsize=(10, 7))
plt.plot(train_loss_list, color="tab:blue")
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.savefig(f"{OUT_DIR}/{save_name}.png")
# plt.close()
print("SAVING PLOTS COMPLETE...")
def save_mAP(OUT_DIR, map_05, map):
"""
Saves the mAP@0.5 and mAP@0.5:0.95 curves per epoch.
"""
plt.figure(figsize=(10, 7))
plt.plot(map_05, color="tab:orange", linestyle="-", label="mAP@0.5")
plt.plot(map, color="tab:red", linestyle="-", label="mAP@0.5:0.95")
plt.xlabel("Epochs")
plt.ylabel("mAP")
plt.legend()
plt.savefig(f"{OUT_DIR}/map.png")
# plt.close()
print("SAVING mAP PLOTS COMPLETE...")
def create_model(num_classes=91):
"""
Creates a RetinaNet-ResNet50-FPN v2 model pre-trained on COCO.
Replaces the classification head for the required number of classes.
"""
model = torchvision.models.detection.retinanet_resnet50_fpn_v2(weights=RetinaNet_ResNet50_FPN_V2_Weights.COCO_V1)
num_anchors = model.head.classification_head.num_anchors
# Replace the classification head
model.head.classification_head = RetinaNetClassificationHead(
in_channels=256, num_anchors=num_anchors, num_classes=num_classes, norm_layer=partial(torch.nn.GroupNorm, 32)
)
return model
def train(train_data_loader, model, optimizer, train_loss_hist, DEVICE):
print("Training")
model.train()
# initialize tqdm progress bar
prog_bar = tqdm(train_data_loader, total=len(train_data_loader))
for i, data in enumerate(prog_bar):
optimizer.zero_grad()
images, targets = data
images = list(image.to(DEVICE) for image in images)
targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
loss_value = losses.item()
train_loss_hist.send(loss_value)
losses.backward()
optimizer.step()
# update the loss value beside the progress bar for each iteration
prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
return loss_value
def validate(valid_loader, model, DEVICE):
"""
Function to perform validation on the validation dataset.
Returns the mAP values.
"""
print("Validating")
# Initialize the metric.
metric = MeanAveragePrecision()
# Set the model to evaluation mode.
model.eval()
# Initialize tqdm progress bar.
prog_bar = tqdm(valid_loader, total=len(valid_loader))
with torch.no_grad():
for i, data in enumerate(prog_bar):
images, targets = data
images = list(image.to(DEVICE) for image in images)
targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
outputs = model(images)
# For torchmetrics, we need to format the predictions and targets.
# Predictions should be a list of dicts, each dict containing:
# boxes (FloatTensor[N, 4]), scores (FloatTensor[N]), labels (IntTensor[N])
# Targets should also be a list of dicts, each dict containing:
# boxes (FloatTensor[M, 4]), labels (IntTensor[M])
formatted_preds = []
formatted_targets = []
for j in range(len(outputs)):
formatted_preds.append({
"boxes": outputs[j]["boxes"],
"scores": outputs[j]["scores"],
"labels": outputs[j]["labels"],
})
formatted_targets.append({
"boxes": targets[j]["boxes"],
"labels": targets[j]["labels"],
})
metric.update(formatted_preds, formatted_targets)
# Compute the metrics.
metric_summary = metric.compute()
return metric_summary
def show_tranformed_image(train_loader, CLASSES, DEVICE):
"""
Visualize transformed images from the `train_loader` for debugging.
Only runs if `VISUALIZE_TRANSFORMED_IMAGES = True` in config.py.
"""
if len(train_loader) > 0:
for i in range(2):
images, targets = next(iter(train_loader))
images = list(image.to(DEVICE) for image in images)
targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
for i in range(len(images)):
if len(targets[i]["boxes"]) == 0:
continue
boxes = targets[i]["boxes"].cpu().numpy().astype(np.int32)
labels = targets[i]["labels"].cpu().numpy().astype(np.int32)
sample = images[i].permute(1, 2, 0).cpu().numpy()
sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR)
for box_num, box in enumerate(boxes):
cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
cv2.putText(
sample,
CLASSES[labels[box_num]],
(box[0], box[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX,
1.0,
(0, 0, 255),
2,
)
cv2_imshow(sample)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == "__main__":
os.makedirs("outputs", exist_ok=True)
train_dataset = create_train_dataset(TRAIN_DIR, RESIZE_TO, CLASSES, get_train_transform)
valid_dataset = create_valid_dataset(VALID_DIR, RESIZE_TO, CLASSES, get_valid_transform)
train_loader = create_train_loader(train_dataset, BATCH_SIZE, NUM_WORKERS, collate_fn)
valid_loader = create_valid_loader(valid_dataset, BATCH_SIZE, NUM_WORKERS, collate_fn)
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(valid_dataset)}\n")
# Initialize the model and move to the computation device.
model = create_model(num_classes=NUM_CLASSES)
model = model.to(DEVICE)
print(model)
# Total parameters and trainable parameters.
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, nesterov=True, weight_decay=0.0005)
scheduler = ReduceLROnPlateau(
optimizer,
mode="max", # we want to maximize mAP
factor=0.1, # reduce LR by this factor
patience=8, # wait 3 epochs with no improvement
threshold=0.005, # how much improvement is considered significant
cooldown=1,
)
# To monitor training loss
train_loss_hist = Averager()
# To store training loss and mAP values.
train_loss_list = []
map_50_list = []
map_list = []
# Mame to save the trained model with.
MODEL_NAME = "model"
# Whether to show transformed images from data loader or not.
if VISUALIZE_TRANSFORMED_IMAGES:
show_tranformed_image(train_loader, CLASSES, DEVICE)
# To save best model.
save_best_model = SaveBestModel()
metric = MeanAveragePrecision()
metric.warn_on_many_detections = False
# Training loop.
for epoch in range(NUM_EPOCHS):
print(f"\nEPOCH {epoch+1} of {NUM_EPOCHS}")
# Reset the training loss histories for the current epoch.
train_loss_hist.reset()
# Start timer and carry out training and validation.
start = time.time()
train_loss = train(train_loader, model, optimizer, train_loss_hist, DEVICE)
metric_summary = validate(valid_loader, model, DEVICE)
current_map_05_95 = float(metric_summary["map"])
current_map_05 = float(metric_summary["map_50"])
print(f"Epoch #{epoch+1} train loss: {train_loss_hist.value:.3f}")
print(f"Epoch #{epoch+1} mAP: {metric_summary['map']:.3f}")
end = time.time()
print(f"Took {((end - start) / 60):.3f} minutes for epoch {epoch+1}")
train_loss_list.append(train_loss)
map_50_list.append(metric_summary["map_50"])
map_list.append(metric_summary["map"])
# save the best model till now.
save_best_model(model, float(metric_summary["map"]), epoch, "outputs")
# Save the current epoch model.
save_model(epoch, model, optimizer, OUT_DIR)
# Save loss plot.
save_loss_plot(OUT_DIR, train_loss_list)
# Save mAP plot.
save_mAP(OUT_DIR, map_50_list, map_list)
scheduler.step(current_map_05_95)
print("Current LR:", scheduler.get_last_lr())
/usr/local/lib/python3.11/dist-packages/albumentations/core/composition.py:331: UserWarning: Got processor for bboxes, but no transform to process it. self._set_keys() /usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py:624: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary. warnings.warn(
Number of training samples: 1052
Number of validation samples: 225
RetinaNet(
(backbone): BackboneWithFPN(
(body): IntermediateLayerGetter(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(layer2): Sequential(
(0): Bottleneck(
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(3): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(layer3): Sequential(
(0): Bottleneck(
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(3): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(4): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(5): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(layer4): Sequential(
(0): Bottleneck(
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
)
(fpn): FeaturePyramidNetwork(
(inner_blocks): ModuleList(
(0): Conv2dNormActivation(
(0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
)
(1): Conv2dNormActivation(
(0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
)
(2): Conv2dNormActivation(
(0): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
)
)
(layer_blocks): ModuleList(
(0-2): 3 x Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(extra_blocks): LastLevelP6P7(
(p6): Conv2d(2048, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(p7): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
)
)
(anchor_generator): AnchorGenerator()
(head): RetinaNetHead(
(classification_head): RetinaNetClassificationHead(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): GroupNorm(32, 256, eps=1e-05, affine=True)
(2): ReLU(inplace=True)
)
(1): Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): GroupNorm(32, 256, eps=1e-05, affine=True)
(2): ReLU(inplace=True)
)
(2): Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): GroupNorm(32, 256, eps=1e-05, affine=True)
(2): ReLU(inplace=True)
)
(3): Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): GroupNorm(32, 256, eps=1e-05, affine=True)
(2): ReLU(inplace=True)
)
)
(cls_logits): Conv2d(256, 45, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(regression_head): RetinaNetRegressionHead(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): GroupNorm(32, 256, eps=1e-05, affine=True)
(2): ReLU(inplace=True)
)
(1): Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): GroupNorm(32, 256, eps=1e-05, affine=True)
(2): ReLU(inplace=True)
)
(2): Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): GroupNorm(32, 256, eps=1e-05, affine=True)
(2): ReLU(inplace=True)
)
(3): Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): GroupNorm(32, 256, eps=1e-05, affine=True)
(2): ReLU(inplace=True)
)
)
(bbox_reg): Conv2d(256, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(transform): GeneralizedRCNNTransform(
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
Resize(min_size=(800,), max_size=1333, mode='bilinear')
)
)
36,414,865 total parameters.
36,189,521 training parameters.
EPOCH 1 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
/usr/local/lib/python3.11/dist-packages/torchmetrics/utilities/prints.py:43: UserWarning: Encountered more than 100 detections in a single image. This means that certain detections with the lowest scores will be ignored, that may have an undesirable impact on performance. Please consider adjusting the `max_detection_threshold` to suit your use case. To disable this warning, set attribute class `warn_on_many_detections=False`, after initializing the metric. warnings.warn(*args, **kwargs)
Epoch #1 train loss: 1.812 Epoch #1 mAP: 0.000 Took 4.808 minutes for epoch 1 BEST VALIDATION mAP: 5.263404455035925e-05 SAVING BEST MODEL FOR EPOCH: 1 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 2 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #2 train loss: 1.563 Epoch #2 mAP: 0.000 Took 4.366 minutes for epoch 2 BEST VALIDATION mAP: 8.852824248606339e-05 SAVING BEST MODEL FOR EPOCH: 2 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 3 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #3 train loss: 1.488 Epoch #3 mAP: 0.000 Took 4.335 minutes for epoch 3 BEST VALIDATION mAP: 0.00014105642912909389 SAVING BEST MODEL FOR EPOCH: 3 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 4 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #4 train loss: 1.443 Epoch #4 mAP: 0.002 Took 3.645 minutes for epoch 4 BEST VALIDATION mAP: 0.0015071509405970573 SAVING BEST MODEL FOR EPOCH: 4 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 5 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #5 train loss: 1.438 Epoch #5 mAP: 0.001 Took 5.052 minutes for epoch 5 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 6 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #6 train loss: 1.407 Epoch #6 mAP: 0.001 Took 4.032 minutes for epoch 6 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 7 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #7 train loss: 1.401 Epoch #7 mAP: 0.003 Took 3.664 minutes for epoch 7 BEST VALIDATION mAP: 0.002860076492652297 SAVING BEST MODEL FOR EPOCH: 7 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 8 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #8 train loss: 1.381 Epoch #8 mAP: 0.005 Took 5.010 minutes for epoch 8 BEST VALIDATION mAP: 0.005273118149489164 SAVING BEST MODEL FOR EPOCH: 8 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 9 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #9 train loss: 1.355 Epoch #9 mAP: 0.002 Took 4.375 minutes for epoch 9 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 10 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #10 train loss: 1.346 Epoch #10 mAP: 0.007 Took 4.368 minutes for epoch 10 BEST VALIDATION mAP: 0.0073433914221823215 SAVING BEST MODEL FOR EPOCH: 10 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 11 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #11 train loss: 1.319 Epoch #11 mAP: 0.002 Took 4.362 minutes for epoch 11
/tmp/ipython-input-3538803861.py:305: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`. plt.figure(figsize=(10, 7))
SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 12 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #12 train loss: 1.410 Epoch #12 mAP: 0.002 Took 4.331 minutes for epoch 12 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 13 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #13 train loss: 1.336 Epoch #13 mAP: 0.001 Took 3.660 minutes for epoch 13 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 14 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #14 train loss: 1.311 Epoch #14 mAP: 0.003 Took 5.014 minutes for epoch 14 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 15 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #15 train loss: 1.300 Epoch #15 mAP: 0.004 Took 4.362 minutes for epoch 15 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 16 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #16 train loss: 1.279 Epoch #16 mAP: 0.003 Took 4.361 minutes for epoch 16 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 17 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #17 train loss: 1.274 Epoch #17 mAP: 0.004 Took 3.678 minutes for epoch 17 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 18 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #18 train loss: 1.261 Epoch #18 mAP: 0.004 Took 4.991 minutes for epoch 18 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.01] EPOCH 19 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #19 train loss: 1.247 Epoch #19 mAP: 0.004 Took 4.360 minutes for epoch 19 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 20 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #20 train loss: 1.207 Epoch #20 mAP: 0.003 Took 3.688 minutes for epoch 20 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 21 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #21 train loss: 1.197 Epoch #21 mAP: 0.003 Took 4.997 minutes for epoch 21 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 22 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #22 train loss: 1.188 Epoch #22 mAP: 0.003 Took 4.356 minutes for epoch 22 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 23 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #23 train loss: 1.189 Epoch #23 mAP: 0.004 Took 3.693 minutes for epoch 23 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 24 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #24 train loss: 1.175 Epoch #24 mAP: 0.003 Took 4.989 minutes for epoch 24 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 25 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #25 train loss: 1.171 Epoch #25 mAP: 0.003 Took 4.369 minutes for epoch 25 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 26 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #26 train loss: 1.170 Epoch #26 mAP: 0.004 Took 3.681 minutes for epoch 26 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 27 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #27 train loss: 1.171 Epoch #27 mAP: 0.003 Took 5.019 minutes for epoch 27 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 28 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #28 train loss: 1.161 Epoch #28 mAP: 0.004 Took 4.372 minutes for epoch 28 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.001] EPOCH 29 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #29 train loss: 1.160 Epoch #29 mAP: 0.004 Took 3.657 minutes for epoch 29 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 30 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #30 train loss: 1.158 Epoch #30 mAP: 0.003 Took 5.031 minutes for epoch 30 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 31 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #31 train loss: 1.158 Epoch #31 mAP: 0.003 Took 4.328 minutes for epoch 31 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 32 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #32 train loss: 1.157 Epoch #32 mAP: 0.003 Took 3.662 minutes for epoch 32 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 33 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #33 train loss: 1.151 Epoch #33 mAP: 0.003 Took 4.997 minutes for epoch 33 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 34 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #34 train loss: 1.162 Epoch #34 mAP: 0.003 Took 3.671 minutes for epoch 34 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 35 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #35 train loss: 1.156 Epoch #35 mAP: 0.003 Took 4.679 minutes for epoch 35 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 36 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #36 train loss: 1.154 Epoch #36 mAP: 0.003 Took 4.659 minutes for epoch 36 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 37 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #37 train loss: 1.156 Epoch #37 mAP: 0.003 Took 3.681 minutes for epoch 37 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 38 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #38 train loss: 1.155 Epoch #38 mAP: 0.003 Took 5.017 minutes for epoch 38 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [0.0001] EPOCH 39 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #39 train loss: 1.153 Epoch #39 mAP: 0.003 Took 4.379 minutes for epoch 39 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 40 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #40 train loss: 1.159 Epoch #40 mAP: 0.003 Took 3.649 minutes for epoch 40 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 41 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #41 train loss: 1.152 Epoch #41 mAP: 0.003 Took 5.027 minutes for epoch 41 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 42 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #42 train loss: 1.156 Epoch #42 mAP: 0.003 Took 3.686 minutes for epoch 42 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 43 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #43 train loss: 1.164 Epoch #43 mAP: 0.003 Took 5.008 minutes for epoch 43 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 44 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #44 train loss: 1.153 Epoch #44 mAP: 0.003 Took 3.663 minutes for epoch 44 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 45 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #45 train loss: 1.155 Epoch #45 mAP: 0.003 Took 4.987 minutes for epoch 45 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 46 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #46 train loss: 1.158 Epoch #46 mAP: 0.003 Took 3.699 minutes for epoch 46 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 47 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #47 train loss: 1.158 Epoch #47 mAP: 0.003 Took 4.712 minutes for epoch 47 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 48 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #48 train loss: 1.155 Epoch #48 mAP: 0.003 Took 4.690 minutes for epoch 48 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1e-05] EPOCH 49 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #49 train loss: 1.157 Epoch #49 mAP: 0.003 Took 3.674 minutes for epoch 49 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1.0000000000000002e-06] EPOCH 50 of 50 Training
0%| | 0/131 [00:00<?, ?it/s]
Validating
0%| | 0/28 [00:00<?, ?it/s]
Epoch #50 train loss: 1.159 Epoch #50 mAP: 0.003 Took 5.030 minutes for epoch 50 SAVING PLOTS COMPLETE... SAVING mAP PLOTS COMPLETE... Current LR: [1.0000000000000002e-06]